library(data.table)
library(ggplot2)
library(geosphere)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6

Load and set up data

# North American data
trawl <- readRDS("data/NorthAmerican_trawl_data_full.rds")

# make a tow number so that we can easily eliminate duplicate comparisons (1->2 and 2->1)
# as opposed to haulid that is a factor
trawl[, townum := .GRP, by = haulid]

Calculate distance between tows for each year in each region

# fix lons < -180
trawl[lon < -180 , lon := lon + 360]

# loop through each region and year
# adds all results to long table "dist"
regs <- trawl[, sort(unique(region))]
for(reg in regs){
  yrs <- trawl[region == reg, sort(unique(year))]
  
  for(yr in yrs){
    # geographic distance between tows
    distgeo <- distm(trawl[region == reg & year == yr, .(townum, lon, lat)][!duplicated(townum), .(lon, lat)]) # geographic distance matrix
    rownames(distgeo) <- colnames(distgeo) <- trawl[region == reg & year == yr, .(townum)][!duplicated(townum), townum]
    distgeo.l <- data.table(melt(as.matrix(distgeo), varnames = c("tow1", "tow2"), value.name = "distgeo")) #matrix to long data.table
    distgeo.l <- distgeo.l[tow1 > tow2,] # get rid of repetitions and self-comparisons
    
    # create community matrix
    commat <- dcast(trawl[region == reg & year == yr, .(pres = 1, townum, spp)], townum ~ spp, value.var = "pres", fun.aggregate = length) #long to wide data for community matrix, column names are townum then each spp This adds zeros automatically through length()
    
    # community dissimilarity distances between tows
    distcom <- as.matrix(vegdist(commat[,2:ncol(commat)], method = "jaccard", binary = TRUE)) #dissimilarity 
    colnames(distcom) <- rownames(distcom) <- commat$townum
    distcom.l <- data.table(melt(distcom, varnames = c("tow1", "tow2"), value.name = "jaccard_dissimilarity")) # matrix to long data.table
    distcom.l <- distcom.l[tow1 > tow2,] # to get rid of repetitions and self-comparisons
    
    #add year and region for these values
    distcom.l[, year := yr]
    distcom.l[, region := reg]
    
    #merge distance with jaccard_dissimilarity for this year
    thisdist <- merge(distgeo.l , distcom.l, by = c('tow1', 'tow2'))
    
    # add to output table if it already exists
    # create output table if this is the first loop through
    if(reg == regs[1] & yr == yrs[1]){
      dist <- thisdist
    } else {
      dist <- rbind(dist, thisdist)
    }
  }
}

dist #here we have jaccard and geographic distance
##            tow1  tow2   distgeo jaccard_dissimilarity year
##        1:     2     1 14023.161             0.6666667 1983
##        2:     3     1  3303.328             0.3333333 1983
##        3:     3     2 11086.142             0.6956522 1983
##        4:     4     1  2540.554             0.6000000 1983
##        5:     4     2 14172.731             0.9230769 1983
##       ---                                                 
## 13046364: 54931 54926 47378.393             0.7826087 2004
## 13046365: 54931 54927 44670.762             0.7333333 2004
## 13046366: 54931 54928 43403.397             0.7435897 2004
## 13046367: 54931 54929 43403.397             0.7142857 2004
## 13046368: 54931 54930  6855.344             0.6060606 2004
##                         region
##        1:     Aleutian Islands
##        2:     Aleutian Islands
##        3:     Aleutian Islands
##        4:     Aleutian Islands
##        5:     Aleutian Islands
##       ---                     
## 13046364: West Coast Triennial
## 13046365: West Coast Triennial
## 13046366: West Coast Triennial
## 13046367: West Coast Triennial
## 13046368: West Coast Triennial

Plot distance decay for each year

for(reg in regs){
  print(reg)
  distdec_byyr <- ggplot(data = dist[region == reg,], aes(x = distgeo, y = 1-jaccard_dissimilarity)) +
    geom_point(shape = 1, size = 0.5, alpha = 0.005) +
    theme_classic() +
    labs(x = "Distance (m)", y = "Jaccard Similarity") +
    theme(text=element_text(size = 8), 
          axis.text.x = element_text(angle = 90),
          strip.text.x = element_text(size = 5)) +
    geom_smooth() + 
    facet_wrap( ~ year, ncol = 8) + 
    ggtitle(reg)
  
  # display plot: the slow part
  print(distdec_byyr)
  
}
## [1] "Aleutian Islands"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Eastern Bering Sea"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Gulf of Alaska"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Gulf of Mexico"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Northeast US Fall"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Northeast US Spring"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Scotian Shelf"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Southeast US Fall"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Southeast US Spring"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "Southeast US Summer"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "West Coast Annual"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## [1] "West Coast Triennial"
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# save plot: also slow
# ggsave(distdec_byyr, path = "plots/", file = "distance_decay_tow_year.png", dpi = 300, width = 8, height = 8, units = 'in')

Extract coefficient (slope) for each year: how does it change through time?

# get slopes
# could do this as quantile regression to get median (as in Magurran et al.)
for(reg in regs){
  yrs <- dist[region == reg, sort(unique(year))]
  for (yr in yrs) {
    # Estimate the parameters using a linear model for this year
    mod <- lm(jaccard_dissimilarity ~ distgeo, data = dist[region == reg & year == yr,])
    if(reg == regs[1] & yr == yrs[1]){
      mod_coefs <- data.table(region = reg, year = yr, beta = coef(mod)[2])
    } else {
      thesecoefs <- data.table(region = reg, year = yr, beta = coef(mod)[2])
      mod_coefs <- rbind(mod_coefs, thesecoefs)
    }
  }
  
}
mod_coefs
##                    region year         beta
##   1:     Aleutian Islands 1983 8.300186e-08
##   2:     Aleutian Islands 1986 3.995557e-08
##   3:     Aleutian Islands 1991 4.739134e-08
##   4:     Aleutian Islands 1994 2.142118e-08
##   5:     Aleutian Islands 1997 2.522780e-08
##  ---                                       
## 320: West Coast Triennial 1992 1.395808e-07
## 321: West Coast Triennial 1995 1.014676e-07
## 322: West Coast Triennial 1998 1.060799e-07
## 323: West Coast Triennial 2001 1.087129e-07
## 324: West Coast Triennial 2004 1.259362e-07

plot the distance decays by year

ggplot(mod_coefs, aes(year, beta, group = region)) +
  geom_point() +
  facet_wrap(~region, scales = 'free', ncol = 3) +
  geom_smooth() +
  theme(text=element_text(size = 8), 
        axis.text.x = element_text(angle = 90),
        strip.text.x = element_text(size = 8))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave('plots/distance_decay_tow_slope_byregion.png', width = 8, height = 8, units = 'in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'